function [ names, chromosomes, startSites, endSites ] = readCancerGenes(filename)
%UNTITLED Summary of this function goes here
%   Detailed explanation goes here
F = fopen(filename);

locPattern = 'chr(\d+|X|Y)\:(\d*)\-(\d*)';

% pull out header line
nextLine = fgets(F);
lineCount = 0;
while (nextLine ~= -1)
    nextLine = fgets(F);
    lineCount = lineCount + 1;
end
fprintf('lines: %d\n', lineCount);

F = fopen(filename);
nextLine = fgets(F);
nextLine = fgets(F);
i = 1;
while (nextLine ~= -1)
    nextLine = strrep(nextLine, ' ', '');
    next = strread(nextLine, '%s');
    names{i, 1} = next{2, 1};
    for j = 1:size(next, 1)
        [matches, tokens] = regexp(next{j, 1}, locPattern, 'match', 'tokens');
        if (size(matches, 1) > 0)
           if (strcmp(tokens{1}{1}, 'X'))
               chromosomes(i, 1) = 23;
           elseif (strcmp(tokens{1}{1}, 'Y'))
               chromosomes(i, 1) = 24;
           else
               chromosomes(i, 1) = str2num(tokens{1}{1});
           end
           
           if (size(tokens{1}{2}, 1) == 0)
               startSites(i, 1) = -1;
           else
               startSites(i, 1) = str2num(tokens{1}{2});
           end
           
           if (size(tokens{1}{3}, 1) == 0)
               endSites(i, 1) = -1;
           else
               endSites(i, 1) = str2num(tokens{1}{3});
           end
        end
    end
    
    nextLine = fgets(F);
    i = i + 1;
end
end

